## Input for ChromVAR is list of deduplicated single-cell mapped files and narrowPeaks.

##For Clustering cells based on Jaspar Motif Enrichment:

library(chromVAR)
library(motifmatchr)
library(Matrix)
library(SummarizedExperiment)
library(BiocParallel)
set.seed(2017)
register(MulticoreParam(8, progressbar = TRUE))

peakfile <- "MACs2.narrowPeak"   ##Make sure to remove non-linear sequences especially located in Chr17 for Humans or ChrX for Mouse before uploading to R.

peaks <- readNarrowpeaks(peakfile, width = 500, non_overlapping = TRUE) 

bamfiles <- scan("Cells.txt", what="") ##Cells.txt is list of bam files that are all located in the same working directory

celltypes <- scan("Celltypes.txt", what="") ##Celltypes.txt is annotation for the bam files in Cells.txt. Should be in the sameorder

fragment_counts <- getCounts(bamfiles, peaks, paired =  TRUE, by_rg = TRUE, format = "bam", colData = DataFrame(celltype = celltypes))

library(BSgenome.Hsapiens.UCSC.hg19) ## or library(BSgenome.Mmusculus.UCSC.mm9) for Mouse

fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg19) ## or genome = BSgenome.Mmusculus.UCSC.mm9 for Mouse

counts_filtered <- filterSamples(fragment_counts, shiny = FALSE)

filtering_plot <- filterSamplesPlot(fragment_counts, use_plotly = FALSE)

filtering_plot ##You can obtain proportion of library fragments in peaks for each library from this plot

counts_filtered <- filterPeaks(counts_filtered, non_overlapping = TRUE)

library(motifmatchr)
library(SummarizedExperiment)

library(Matrix)
jaspar_motifs <- getJasparMotifs() ## or getJasparMotifs(species="Mus musculus") for Mouse

motif_ix <- matchMotifs(jaspar_motifs, counts_filtered, genome = BSgenome.Hsapiens.UCSC.hg19) ## or genome = BSgenome.Mmusculus.UCSC.mm9 for Mouse

dev <- computeDeviations(object = counts_filtered, annotations = motif_ix)

variability <- computeVariability(dev)

plotVariability(variability, use_plotly = FALSE)

variability
write.table(variability, file="Variability.txt", sep="\t", col.names=NA, quote=FALSE)

sample_cor <- getSampleCorrelation(dev)

library(pheatmap)
pheatmap(as.dist(sample_cor), 
         annotation_row = colData(dev), 
         clustering_distance_rows = as.dist(1-sample_cor), 
         clustering_distance_cols = as.dist(1-sample_cor))
tsne_results <- deviationsTsne(dev, threshold = 1, perplexity = 10, shiny = FALSE)
tsne_plots <- plotDeviationsTsne(dev, tsne_results, annotation = "Motif Of Interest", sample_column = "celltype", shiny = FALSE)


##For Clustering cells based on NMF Cluster-specific peaks:

library(chromVAR)
library(motifmatchr)
library(Matrix)
library(SummarizedExperiment)
library(BiocParallel)
set.seed(2017)
register(MulticoreParam(8, progressbar = TRUE))

peakfile <- "MACs2.narrowPeak"   ##Make sure to remove non-linear sequences especially located in Chr17 for Humans or ChrX for Mouse before uploading to R.

peaks <- readNarrowpeaks(peakfile, width = 500, non_overlapping = TRUE) 

bamfiles <- scan("Cells.txt", what="") ##Cells.txt is list of bam files that are all located in the same working directory

celltypes <- scan("Celltypes.txt", what="") ##Celltypes.txt is annotation for the bam files in Cells.txt. Should be in the sameorder

fragment_counts <- getCounts(bamfiles, peaks, paired =  TRUE, by_rg = TRUE, format = "bam", colData = DataFrame(celltype = celltypes))

library(BSgenome.Hsapiens.UCSC.hg19) ## or library(BSgenome.Mmusculus.UCSC.mm9) for Mouse

fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg19) ## or genome = BSgenome.Mmusculus.UCSC.mm9 for Mouse

counts_filtered <- filterSamples(fragment_counts, shiny = FALSE)

filtering_plot <- filterSamplesPlot(fragment_counts, use_plotly = FALSE)
filtering_plot ##You can obtain proportion of library fragments in peaks for each library from this plot

counts_filtered <- filterPeaks(counts_filtered, non_overlapping = TRUE)

library(motifmatchr)
library(SummarizedExperiment)
library(Matrix)

anno_ix <- getAnnotations(my_annotation_file, rowRanges = rowRanges(NMF_filtered), column = 4)
anno_ix

dev <- computeDeviations(object = NMF_filtered,  annotations = anno_ix)

variability <- computeVariability(dev)

plotVariability(variability, use_plotly = FALSE)

sample_cor <- getSampleCorrelation(dev)

write.table(variability, file="Variability.txt", sep="\t", col.names=NA, quote=FALSE)

sample_cor <- getSampleCorrelation(dev)
library(pheatmap)

tsne_results <- deviationsTsne(dev, threshold = 1, perplexity = 10, shiny = FALSE)

tsne_plots <- plotDeviationsTsne(dev, tsne_results, annotation = "NMF Cluster of 
Interest", sample_column = "celltype", shiny = FALSE)